library(ArchR)
library(tidyverse)
proj <- loadArchRProject("12_Copy/")

proj <- addKathiScoreMatrix(
  input = proj, 
  genes = getGenes(proj),
  peaks = getPeakSet(proj),
  geneModel = "exp(-abs(x)/5000)",
  matrixName = "GeneScoreMatrix",
  extendUpstream = c(1000, 100000),
  extendDownstream = c(1000, 100000),
  #geneUpstream = 5000, #New Param
  #geneDownstream = 0, #New Param
  useGeneBoundaries = TRUE,
  useTSS = TRUE, #New Param
  extendTSS = FALSE,
  tileSize = 500,
  ceiling = 4,
  geneScaleFactor = 5, #New Param
  scaleTo = 10000,
  excludeChr = c("chrY", "chrM"),
  blacklist = getBlacklist(input),
  threads = getArchRThreads(),
  parallelParam = NULL,
  subThreading = FALSE,
  force = TRUE,
  logFile = createLogFile(".addKathiGeneScoreMat"))
proj <- loadArchRProject("12_Copy/")

i = 1
ArrowFiles =  getArrowFiles(proj)
genes = getGenes(proj)
peaks = getPeakSet(proj)
geneModel = "exp(-abs(x)/5000)"
matrixName = "GeneScoreMatrix"
extendUpstream = c(1000, 100000)
extendDownstream = c(1000, 100000)
geneUpstream = 5000 #New Param
geneDownstream = 0 #New Param
useGeneBoundaries = TRUE
useTSS = TRUE #New Param
extendTSS = FALSE
tileSize = 500
ceiling = 4
geneScaleFactor = 5 #New Param
scaleTo = 10000
excludeChr = c("chrY","chrM")
blacklist = getBlacklist(proj)
cellNames = NULL
allCells = NULL
force = FALSE
tmpFile = NULL
subThreads = 1
tstart = NULL
logFile = NULL
# addGeneScoreMatrix <- function(
#   input = NULL,
#   genes = getGenes(input),
#   geneModel = "exp(-abs(x)/5000) + exp(-1)",
#   matrixName = "GeneScoreMatrix",
#   extendUpstream = c(1000, 100000),
#   extendDownstream = c(1000, 100000),
#   geneUpstream = 5000, #New Param
#   geneDownstream = 0, #New Param
#   useGeneBoundaries = TRUE,
#   useTSS = FALSE, #New Param
#   extendTSS = FALSE,
#   tileSize = 500,
#   ceiling = 4,
#   geneScaleFactor = 5, #New Param
#   scaleTo = 10000,
#   excludeChr = c("chrY", "chrM"),
#   blacklist = getBlacklist(input),
#   threads = getArchRThreads(),
#   parallelParam = NULL,
#   subThreading = TRUE,
#   force = FALSE,
#   logFile = createLogFile("addGeneScoreMatrix")
#   ){


  .validInput(input = input, name = "input", valid = c("ArchRProj", "character"))
  .validInput(input = genes, name = "genes", valid = c("GRanges"))
  .validInput(input = geneModel, name = "geneModel", valid = c("character"))
  .validInput(input = matrixName, name = "matrixName", valid = c("character"))
  .validInput(input = extendUpstream, name = "extendUpstream", valid = c("integer"))
  .validInput(input = extendDownstream, name = "extendDownstream", valid = c("integer"))
  .validInput(input = tileSize, name = "tileSize", valid = c("integer"))
  .validInput(input = ceiling, name = "ceiling", valid = c("integer"))
  .validInput(input = useGeneBoundaries, name = "useGeneBoundaries", valid = c("boolean"))
  .validInput(input = scaleTo, name = "scaleTo", valid = c("numeric"))
  .validInput(input = excludeChr, name = "excludeChr", valid = c("character", "null"))
  .validInput(input = blacklist, name = "blacklist", valid = c("GRanges", "null"))
  .validInput(input = threads, name = "threads", valid = c("integer"))
  .validInput(input = parallelParam, name = "parallelParam", valid = c("parallelparam", "null"))
  .validInput(input = force, name = "force", valid = c("boolean"))
  .validInput(input = logFile, name = "logFile", valid = c("character"))

Original + plots

proj <- loadArchRProject("12_Copy/")

i = 1
ArrowFiles =  getArrowFiles(proj)
genes = getGenes(proj)
peaks = getPeakSet(proj)
geneModel = "exp(-abs(x)/5000)"
matrixName = "GeneScoreMatrix"
extendUpstream = c(1000, 100000)
extendDownstream = c(1000, 100000)
geneUpstream = 5000 #New Param
geneDownstream = 0 #New Param
useGeneBoundaries = TRUE
useTSS = TRUE #New Param
extendTSS = FALSE
tileSize = 500
ceiling = 4
geneScaleFactor = 5 #New Param
scaleTo = 10000
excludeChr = c("chrY","chrM")
blacklist = getBlacklist(proj)
cellNames = NULL
allCells = NULL
force = FALSE
tmpFile = NULL
subThreads = 1
tstart = NULL
logFile = NULL
input = proj
logFile = createLogFile("addGeneScoreMatrix")
matrixName <- ArchR:::.isProtectedArray(matrixName, exclude = "GeneScoreMatrix")

if(inherits(input, "ArchRProject")){
  ArrowFiles <- getArrowFiles(input)
  allCells <- rownames(getCellColData(input))
  outDir <- getOutputDirectory(input)
}else if(inherits(input, "character")){
  outDir <- ""
  ArrowFiles <- input
  allCells <- NULL
}else{
  stop("Error Unrecognized Input!")
}
if(!all(file.exists(ArrowFiles))){
  stop("Error Input Arrow Files do not all exist!")
}

if(inherits(mcols(genes)$symbol, "list") | inherits(mcols(genes)$symbol, "SimpleList")){
  stop("Found a list in genes symbol! This is an incorrect format. Please correct your genes!")
}

#ArchR:::.startLogging(logFile = logFile)
#ArchR:::.logThis(mget(names(formals()),sys.frame(sys.nframe())), "addGeneScoreMatrix Input-Parameters", logFile = logFile)

#Valid GRanges
genes <- ArchR:::.validGRanges(genes)

# #Add args to list
# args <- mget(names(formals()),sys.frame(sys.nframe()))#as.list(match.call())
# args$ArrowFiles <- ArrowFiles
# args$allCells <- allCells
# args$X <- seq_along(ArrowFiles)
# args$FUN <- .addGeneScoreMat
# args$registryDir <- file.path(outDir, "GeneScoresRegistry")
# args$logFile <- logFile
# 
# if(subThreading){
#   h5disableFileLocking()
# }else{
#   args$threads <- length(inputFiles)
# }
# 
# #Remove Input from args
# args$input <- NULL
# 
# #Run With Parallel or lapply
# outList <- .batchlapply(args)
# 
# if(subThreading){
#   h5enableFileLocking()
# }
# 
# .endLogging(logFile = logFile)
# 
# if(inherits(input, "ArchRProject")){
# 
#   return(input)
# 
# }else{
# 
#   return(unlist(outList))
# 
# }
#proj <- loadArchRProject("12_Ricards_peaks_ChromVar/")
#saveArchRProject(proj, "12_Copy")
#proj <- loadArchRProject("12_Copy/")
input = proj
ArrowFiles <- getArrowFiles(proj)
geneModel = "exp(-abs(x)/5000)"
matrixName = "GeneScoreMatrix"
genes = getGenes(proj)
useTSS = TRUE
extendUpstream = c(1000, 100000)
extendDownstream = c(1000, 100000)
useTSS = TRUE
tileSize = 500
ceiling = 4
blacklist = getBlacklist(proj)
scaleTo = 10000
excludeChr = c("chrY","chrM")
geneScaleFactor = 5
cellNames = NULL
allCells = NULL
force = FALSE
tmpFile = NULL
subThreads = 1
tstart = NULL
logFile = NULL
useGeneBoundaries = TRUE
.addGeneScoreMat <- function(
  i = NULL,
  ArrowFiles = NULL,
  genes = NULL,
  geneModel = "exp(-abs(x)/5000) + exp(-1)",
  matrixName = "GeneScoreMatrix",
  extendUpstream = c(1000, 100000),
  extendDownstream = c(1000, 100000),
  geneUpstream = 5000, #New Param
  geneDownstream = 0, #New Param
  useGeneBoundaries = TRUE,
  useTSS = FALSE, #New Param
  extendTSS = TRUE,
  tileSize = 500,
  ceiling = 4,
  geneScaleFactor = 5, #New Param
  scaleTo = 10000,
  excludeChr = c("chrY","chrM"),
  blacklist = NULL,
  cellNames = NULL,
  allCells = NULL,
  force = FALSE,
  tmpFile = NULL,
  subThreads = 1,
  tstart = NULL,
  logFile = NULL
  ){

  .validInput(input = i, name = "i", valid = c("integer"))
  .validInput(input = ArrowFiles, name = "ArrowFiles", valid = c("character"))
  .validInput(input = genes, name = "genes", valid = c("GRanges"))
  .validInput(input = geneModel, name = "geneModel", valid = c("character"))
  .validInput(input = matrixName, name = "matrixName", valid = c("character"))
  .validInput(input = extendUpstream, name = "extendUpstream", valid = c("integer"))
  .validInput(input = extendDownstream, name = "extendDownstream", valid = c("integer"))
  .validInput(input = tileSize, name = "tileSize", valid = c("integer"))
  .validInput(input = ceiling, name = "ceiling", valid = c("integer"))
  .validInput(input = useGeneBoundaries, name = "useGeneBoundaries", valid = c("boolean"))
  .validInput(input = scaleTo, name = "scaleTo", valid = c("numeric"))
  .validInput(input = excludeChr, name = "excludeChr", valid = c("character", "null"))
  .validInput(input = blacklist, name = "blacklist", valid = c("GRanges", "null"))
  .validInput(input = cellNames, name = "cellNames", valid = c("character", "null"))
  .validInput(input = allCells, name = "allCells", valid = c("character", "null"))
  .validInput(input = force, name = "force", valid = c("boolean"))
  .validInput(input = tmpFile, name = "tmpFile", valid = c("character", "null"))

  if(inherits(mcols(genes)$symbol, "list") | inherits(mcols(genes)$symbol, "SimpleList")){
    stop("Found a list in genes symbol! This is an incorrect format. Please correct your genes!")
  }
  

Compute for only one sample:

i = 1
ArrowFile <- ArrowFiles[i]
sampleName <- ArchR:::.sampleName(ArrowFile)

if(is.null(tmpFile)){
  tmpFile <- ArchR:::.tempfile(pattern = paste0("tmp-", ArchR:::.sampleName(ArrowFile)))
}

#Check
o <- h5closeAll()
#o <- ArchR:::.createArrowGroup(ArrowFile = ArrowFile, group = matrixName, force = force, logFile = logFile)

# remove any chromosomes which we do not want to inlcude (X and Y in this case)
geneRegions <- genes[BiocGenerics::which(seqnames(genes) %bcni% excludeChr)]
print(paste0("Before filtering out the X and Y Chromosome we have ", length(genes), " genes, afterwards we have ", length(geneRegions), "."))
## [1] "Before filtering out the X and Y Chromosome we have 24368 genes, afterwards we have 24334."
geneRegions %>% head
## GRanges object with 6 ranges and 2 metadata columns:
##       seqnames          ranges strand |     gene_id      symbol
##          <Rle>       <IRanges>  <Rle> | <character> <character>
##   [1]     chr1 3214482-3671498      - |      497097        Xkr4
##   [2]     chr1 4290846-4409241      - |       19888         Rp1
##   [3]     chr1 4490928-4497354      - |       20671       Sox17
##   [4]     chr1 4773198-4785726      - |       27395      Mrpl15
##   [5]     chr1 4807893-4846735      + |       18777      Lypla1
##   [6]     chr1 4857694-4897909      + |       21399       Tcea1
##   -------
##   seqinfo: 21 sequences from mm10 genome

# add information on chromsome names to the GRanges object
seqlevels(geneRegions) <- as.character(unique(seqnames(geneRegions)))
geneRegions <- geneRegions[!is.na(mcols(geneRegions)$symbol)]
print(paste0("After removing missing gene names, we are left with ", length(geneRegions), " genes"))
## [1] "After removing missing gene names, we are left with 24333 genes"

#Create Gene Regions Then Remove Strand Column
if(useTSS){
  ArchR:::.logMessage(paste0(sampleName, " .addGeneScoreMat useTSS = TRUE"))
  distMethod <- "GenePromoter"
  
  geneRegions$geneStart <- start(resize(geneRegions, 1, "start"))
  geneRegions$geneEnd <- start(resize(geneRegions, 1, "end"))
  # in the ranges column we want to have the gene start coordinates
  geneRegions <- resize(geneRegions, 1, "start")
  
  # this will be ignored for now, since we only want to use the TSS as 1bp size
  # if(extendTSS){
  #   geneRegions <- extendGR(gr = geneRegions, upstream = geneUpstream, downstream = geneDownstream)
  # }
  
  geneRegions$geneWeight <- geneScaleFactor
  
  
# This will be ignored for now, since we do not want to use gene bodies, but only TSS
}else{
  ArchR:::.logMessage(paste0(sampleName, " .addGeneScoreMat useTSS = FALSE"))
  distMethod <- "GeneBody"
  geneRegions$geneStart <- start(resize(geneRegions, 1, "start"))
  geneRegions$geneEnd <- start(resize(geneRegions, 1, "end"))
  geneRegions <- extendGR(gr = geneRegions, upstream = geneUpstream, downstream = geneDownstream)
  m <- 1 / width(geneRegions)
  geneRegions$geneWeight <- 1 + m * (geneScaleFactor - 1) / (max(m) - min(m))
}

#ArchR:::.logDiffTime(sprintf("Computing Gene Scores using distance relative to %s! ", distMethod), tstart, logFile = logFile)

#Add Gene Index For ArrowFile
geneRegions <- sort(sortSeqlevels(geneRegions), ignore.strand = TRUE)
#ArchR:::.logThis(geneRegions, paste0(sampleName, " .addGeneScoreMat geneRegions"), logFile = logFile)


# split geneRegions into a list of GRanges object, one for each chromosome 
#and add indices for each chromosome
geneRegions <- split(geneRegions, seqnames(geneRegions))
geneRegions <- lapply(geneRegions, function(x){
  mcols(x)$idx <- seq_along(x)
  return(x)
})

#Blacklist Split
if(!is.null(blacklist)){
  if(length(blacklist) > 0){
    # create a list of length of number of chromosomes
    # each element of the list contains the GRanges for the corresponding chromosome only
    blacklist <- split(blacklist, seqnames(blacklist))
  }
}

#Get all cell ids before constructing matrix
if(is.null(cellNames)){
  cellNames <- ArchR:::.availableCells(ArrowFile)
}

if(!is.null(allCells)){
  cellNames <- cellNames[cellNames %in% allCells]
}

tstart <- Sys.time()

The function extendGR(gr, upstream, downstream) from ArchR is handy whenever you want to extend genomic regions in a GRanges object.


  #########################################################################################################
  #First we will write gene scores to a temporary path! rhdf5 delete doesnt actually delete the memory!
  #########################################################################################################
totalGS <- ArchR:::.safelapply(seq_along(geneRegions), function(z){ # we loop over the z chromosomes here
  # geneRegions is a list with one Granges object for each chromosome

  totalGSz <- tryCatch({

    ArchR:::.logDiffTime(sprintf("Creating Temp GeneScoreMatrix for %s, Chr (%s of %s)!", sampleName, z, length(geneRegions)), 
      tstart, verbose = FALSE, logFile = logFile)

    #Get Gene Starts
    # extract Granges for chromosome z
    geneRegionz <- geneRegions[[z]]
    # order the entire GRanges object according to index 
    geneRegionz <- geneRegionz[order(geneRegionz$idx)]
    # get the chromosome information for this gene Region
    chrz <- paste0(unique(seqnames(geneRegionz)))

    #Read in Fragments
    # we get a IRanges object that contains the start/end coordinate, width and 
    # the metadata column containing the sample and barcode for that sample
    frag <- ArchR:::.getFragsFromArrow(ArrowFile, chr = chrz, out = "IRanges", cellNames = cellNames)
    print("Lets have a look at the fragments:")
    frag %>% head
    
    print("Lets have a look at the fragment sizes:")
    hist(width(frag), breaks = 200, main = paste0("Fragment size/width on ",chrz) ,
                                           xlab = "Fragment size")
    # we truncate the start to the start of a 500bp tile
    fragSt <- trunc(start(frag)/tileSize) * tileSize
    # the end will be the same coordinate if it lies wihtin the same 500bp tile
    # the end will be the start coordinate of the next tile if the fragment ends in the next tile.
    fragEd <- trunc(end(frag)/tileSize) * tileSize
    # create a Rle vector which contains the number of times each cell appears in the frag IRanges object
    fragBC <- rep(S4Vectors::match(mcols(frag)$RG, cellNames), 2)
    rm(frag)
    gc()
    
    print(paste0("There are length ", length(fragSt), " start coordinates, but only ", length(unique(fragSt)), " unique inserts, meaining a lot of inserts are found in one and the same tiles." ))
    print(paste0("There are length ", length(fragEd), " start coordinates, but only ", length(unique(fragEd)), " unique inserts, meaining a lot of inserts are found in one and the same tiles." ))
    print(paste0("Out of all inserts, ", length(unique(c(fragSt,fragEd))), " are unique"))
    #Unique Inserts, sorted by coordinates
    uniqIns <- sort(unique(c(fragSt,fragEd)))
    
    # these new insertion coordinates correspond to start/end of a tile where an insertion was found
    #Construct tile by cell mat!
    # i, j and x should have the same dimensions, creating the 3 columns, row, col and value x
    matGS <- Matrix::sparseMatrix(
        i = match(c(fragSt, fragEd), uniqIns),
        j = as.vector(fragBC),
        # each entry is one, meaning that an insertion was found in this tile
        x = rep(1,  2*length(fragSt)),
        dims = c(length(uniqIns), length(cellNames))
      )  
    
    print(ggplot() + geom_bar(aes(x = matGS@x)) +
      scale_y_log10() +
      labs(title = "Number of insertions per tile before applying ceiling = 4",
           x = "number of insertion", y = "log10(count)"))
    
    
    if(!is.null(ceiling)){
      matGS@x[matGS@x > ceiling] <- ceiling # restrict the number of counts per tile to the value stored in celing (4)
    }
    
    print(ggplot() + geom_bar(aes(x = matGS@x)) +
      scale_y_log10() +
      labs(title = "Number of insertions per tile after applying ceiling = 4",
           x = "number of insertion", y = "log10(count)"))

    #Unique Tiles
    # create IRanges object for all tiles which have an insertion
    uniqueTiles <- IRanges(start = uniqIns, width = tileSize)
    
    #Clean Memory
    rm(uniqIns, fragSt, fragEd, fragBC)
    gc() 

    #Time to Overlap Gene Windows
    
    
    #### maybe for later
    if(useGeneBoundaries){

      geneStartz <- start(resize(geneRegionz, 1, "start"))
      geneEndz <- start(resize(geneRegionz, 1, "end"))

      pminGene <- pmin(geneStartz, geneEndz)
      pmaxGene <- pmax(geneStartz, geneEndz)

      idxMinus <- BiocGenerics::which(strand(geneRegionz) != "-")
  
      pReverse <- rep(max(extendDownstream), length(pminGene))
      pReverse[idxMinus] <- rep(max(extendUpstream), length(idxMinus))

      pReverseMin <- rep(min(extendDownstream), length(pminGene))
      pReverseMin[idxMinus] <- rep(min(extendUpstream), length(idxMinus))

      pForward <- rep(max(extendUpstream), length(pminGene))
      pForward[idxMinus] <- rep(max(extendDownstream), length(idxMinus))      

      pForwardMin <- rep(min(extendUpstream), length(pminGene))
      pForwardMin[idxMinus] <- rep(min(extendDownstream), length(idxMinus))      

      ################################################################
      #We will test when genes pass by another gene promoter
      ################################################################

      #Start of Range is based on the max observed gene ranged <- direction
      s <- pmax(
        c(1, pmaxGene[-length(pmaxGene)] + tileSize), 
        pminGene - pReverse
      )
      s <- pmin(pminGene - pReverseMin, s)

      #End of Range is based on the max observed gene ranged -> direction
      e <- pmin(
          c(pminGene[-1] - tileSize, pmaxGene[length(pmaxGene)] + pForward[length(pmaxGene)]), 
          pmaxGene + pForward
        )
      e <- pmax(pmaxGene + pForwardMin, e)

      extendedGeneRegion <- IRanges(start = s, end = e)

      idx1 <- which(pminGene - pReverseMin < start(extendedGeneRegion))
      if(length(idx1) > 0){
        stop("Error in gene boundaries minError")
      }

      idx2 <- which(pmaxGene + pForwardMin > end(extendedGeneRegion))
      if(length(idx2) > 0){
        stop("Error in gene boundaries maxError")
      }
     
     rm(s, e, pReverse, pReverseMin, pForward, pForwardMin, geneStartz, geneEndz, pminGene, pmaxGene)
     
     
     ###### interesting for me
    }else{ 
      
      # first we extend the gene region -> IRanges object, which contains start 
      # and end of the 200,000 bp gene regions/windows (+/- 100,000 from TSS) 
      extendedGeneRegion <- ranges(suppressWarnings(extendGR(geneRegionz, upstream = max(extendUpstream), downstream = max(extendDownstream))))

    }
    
    # Here I would like to use peaks which overlap instead of tiles
    # each row of tmp contains index of GeneRegion and the index of Tiles with 
    # which it overlaps
    tmp <- suppressWarnings(findOverlaps(extendedGeneRegion, uniqueTiles))
    tmp %>% head
    
    print(paste0("In total there are ", subjectLength(tmp), " tiles overlapping with genes found on this chromosome"))
    print(paste0("The total number of tiles found on this chromosome is ", length(uniqueTiles)))
    print(tmp %>% as.data.frame() %>% 
           group_by(queryHits) %>% # gene region
           summarize(n = n()) %>% # get number of peaks overlapping with a gene region
           ggplot() + geom_bar(aes(x = n)) +
           labs(title = "number of tiles per gene region of size +/- 100kb from TSS",
               x = "number of tiles"))
    
    print(tmp %>% as.data.frame() %>% 
           group_by(queryHits) %>% # gene region
           summarize(n = n()) %>% # get number of peaks overlapping with a gene region
           ggplot() + geom_bar(aes(x = n)) +
           labs(title = "number of tiles per gene region of size +/- 100kb from TSS",
               x = "number of tiles", y = "log10(counts)") + 
            scale_y_log10())
    
    # by extracting the indices of genes and tiles which overlap and computing 
    # the distances between them we get for each match the distance between gene and tile
    x <- distance(ranges(geneRegionz)[queryHits(tmp)], uniqueTiles[subjectHits(tmp)])
    
    print(ggplot() + geom_histogram(aes(x = x), bins = 500) + 
      labs(title = "Distribution of absolute distances between genes and tiles within a gene region",
           x = "distance"))
    

    #Determine Sign for Distance relative to strand (Directionality determined based on dist from gene start)
    # negative distance meaning  upstrea, positive distance downstream of gene
    
    # get Minus strand coordinates
    isMinus <- BiocGenerics::which(strand(geneRegionz) == "-")
    # subtract the gene start coordinate from the tile start coordinate -> relative distances
    signDist <- sign(start(uniqueTiles)[subjectHits(tmp)] - 
                       start(resize(geneRegionz,1,"start"))[queryHits(tmp)])
    # convert the direction of distance for all distances corresponding to the negative strand
    signDist[isMinus] <- signDist[isMinus] * -1

    #Correct the orientation for the distance!
    # x contains absolute distances, but we want to give direction to distances
    x <- x * signDist
    print(ggplot() + geom_histogram(aes(x = x), bins = 500) + 
      labs(title = "Distribution of relative distances between genes and tiles within a gene region",
           x = "relative distance to TSS"))
    
    #Evaluate  Input Model -> compute distance weights
    x <- eval(parse(text=geneModel))
    print(ggplot() + geom_histogram(aes(x = x), bins = 500) + 
      labs(title = "Distribution of distance weights",
           x = "distance weight"))
    print(ggplot() + geom_histogram(aes(x = x), bins = 500) + 
      labs(title = "Distribution of distance weights",
           x = "distance weight", y = log10(count)))
    
    print("Real distances: ")
    x %>% head

    #Get Gene Weights Related to Gene Width, in our case simply multiply 
    # everything by 5, because we do not use the size of the gene
    x <- x * mcols(geneRegionz)$geneWeight[queryHits(tmp)]
    
    print("Distances scaled by gene weight, which is constant in our case: ")
    x %>% head

    #Remove Blacklisted Tiles!
    if(!is.null(blacklist)){
      if(length(blacklist) > 0){
        blacklistz <- blacklist[[chrz]]
        if(is.null(blacklistz) | length(blacklistz) > 0){
          tilesBlacklist <- 1 * (!overlapsAny(uniqueTiles, ranges(blacklistz)))
          if(sum(tilesBlacklist == 0) > 0){
            x <- x * tilesBlacklist[subjectHits(tmp)] #Multiply Such That All Blacklisted Tiles weight is now 0!
          }
        }
      }
    }

    #Creating Sparse Matrix
    tmp <- Matrix::sparseMatrix(
      i = queryHits(tmp), 
      j = subjectHits(tmp), 
      x = x, 
      dims = c(length(geneRegionz), nrow(matGS))
    )

    #Calculate Gene Scores
    matGS <- tmp %*% matGS
    colnames(matGS) <- cellNames

    totalGSz <- Matrix::colSums(matGS)
    print(paste0("For chromosome ", chrz, " there are ", length(totalGSz), " cells, meaning that there will be as many total gene activity scores."))
    print(ggplot() + geom_histogram(aes(x = totalGSz), bins = 500) +
      labs(title = "Total Gene activity in each cell", x = "total gene activity"))

    #Save tmp file
    .safeSaveRDS(matGS, file = paste0(tmpFile, "-", chrz, ".rds"), compress = FALSE)

    #Clean Memory
    rm(isMinus, signDist, extendedGeneRegion, uniqueTiles)
    rm(matGS, tmp)
    gc()

    totalGSz
 
  }, error = function(e){

    errorList <- list(
      ArrowFile = ArrowFile,
      geneRegions = geneRegions,
      blacklist = blacklist,
      chr = chrz,
      totalGSz = if(exists("totalGSz", inherits = FALSE)) totalGSz else "totalGSz",
      matGS = if(exists("matGS", inherits = FALSE)) matGS else "matGS"
    )

   # ArchR:::.logError(e, fn = "ArchR:::.addGeneScoreMat TmpGS", info = sampleName, errorList = errorList, logFile = logFile)

  })

  totalGSz

})#, threads = subThreads) %>% Reduce("+", .)
## [1] "Lets have a look at the fragments:"
## [1] "Lets have a look at the fragment sizes:"

## [1] "There are length 14266647 start coordinates, but only 369018 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "There are length 14266647 start coordinates, but only 368956 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "Out of all inserts, 371687 are unique"

## [1] "In total there are 371687 tiles overlapping with genes found on this chromosome"
## [1] "The total number of tiles found on this chromosome is 371687"

## [1] "Lets have a look at the fragments:"
## [1] "Lets have a look at the fragment sizes:"

## [1] "There are length 16815896 start coordinates, but only 340970 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "There are length 16815896 start coordinates, but only 340942 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "Out of all inserts, 342982 are unique"

## [1] "In total there are 342982 tiles overlapping with genes found on this chromosome"
## [1] "The total number of tiles found on this chromosome is 342982"

## [1] "Lets have a look at the fragments:"
## [1] "Lets have a look at the fragment sizes:"

## [1] "There are length 9013671 start coordinates, but only 220405 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "There are length 9013671 start coordinates, but only 220419 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "Out of all inserts, 222711 are unique"

## [1] "In total there are 222711 tiles overlapping with genes found on this chromosome"
## [1] "The total number of tiles found on this chromosome is 222711"

## [1] "Lets have a look at the fragments:"
## [1] "Lets have a look at the fragment sizes:"

## [1] "There are length 9474237 start coordinates, but only 223417 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "There are length 9474237 start coordinates, but only 223370 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "Out of all inserts, 225269 are unique"

## [1] "In total there are 225269 tiles overlapping with genes found on this chromosome"
## [1] "The total number of tiles found on this chromosome is 225269"

## [1] "Lets have a look at the fragments:"
## [1] "Lets have a look at the fragment sizes:"

## [1] "There are length 8475666 start coordinates, but only 222157 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "There are length 8475666 start coordinates, but only 222222 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "Out of all inserts, 224609 are unique"

## [1] "In total there are 224609 tiles overlapping with genes found on this chromosome"
## [1] "The total number of tiles found on this chromosome is 224609"

## [1] "Lets have a look at the fragments:"
## [1] "Lets have a look at the fragment sizes:"

## [1] "There are length 8848527 start coordinates, but only 194191 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "There are length 8848527 start coordinates, but only 194256 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "Out of all inserts, 195574 are unique"

## [1] "In total there are 195574 tiles overlapping with genes found on this chromosome"
## [1] "The total number of tiles found on this chromosome is 195574"

## [1] "Lets have a look at the fragments:"
## [1] "Lets have a look at the fragment sizes:"

## [1] "There are length 7053025 start coordinates, but only 182549 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "There are length 7053025 start coordinates, but only 182568 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "Out of all inserts, 183799 are unique"

## [1] "In total there are 183799 tiles overlapping with genes found on this chromosome"
## [1] "The total number of tiles found on this chromosome is 183799"

## [1] "Lets have a look at the fragments:"
## [1] "Lets have a look at the fragment sizes:"

## [1] "There are length 9421264 start coordinates, but only 176320 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "There are length 9421264 start coordinates, but only 176308 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "Out of all inserts, 177748 are unique"

## [1] "In total there are 177748 tiles overlapping with genes found on this chromosome"
## [1] "The total number of tiles found on this chromosome is 177748"

## [1] "Lets have a look at the fragments:"
## [1] "Lets have a look at the fragment sizes:"

## [1] "There are length 6814904 start coordinates, but only 169030 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "There are length 6814904 start coordinates, but only 169009 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "Out of all inserts, 170035 are unique"

## [1] "In total there are 170035 tiles overlapping with genes found on this chromosome"
## [1] "The total number of tiles found on this chromosome is 170035"

## [1] "Lets have a look at the fragments:"
## [1] "Lets have a look at the fragment sizes:"

## [1] "There are length 6464254 start coordinates, but only 112839 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "There are length 6464254 start coordinates, but only 112785 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "Out of all inserts, 113523 are unique"

## [1] "In total there are 113523 tiles overlapping with genes found on this chromosome"
## [1] "The total number of tiles found on this chromosome is 113523"

## [1] "Lets have a look at the fragments:"
## [1] "Lets have a look at the fragment sizes:"

## [1] "There are length 4555885 start coordinates, but only 272415 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "There are length 4555885 start coordinates, but only 272527 unique inserts, meaining a lot of inserts are found in one and the same tiles."
## [1] "Out of all inserts, 279601 are unique"

## [1] "In total there are 279601 tiles overlapping with genes found on this chromosome"
## [1] "The total number of tiles found on this chromosome is 279601"

#########################################################################################################
#Organize info for ArchR Arrow
#########################################################################################################
featureDF <- Reduce("c",geneRegions) %>% 
  {data.frame(
    row.names=NULL,
    seqnames=as.character(seqnames(.)),
    start=mcols(.)$geneStart,
    end=mcols(.)$geneEnd,
    strand=as.integer(strand(.)),
    name=mcols(.)$symbol,
    idx=mcols(.)$idx,
    stringsAsFactors=FALSE)}
.logThis(featureDF, paste0(sampleName, " .addGeneScoreMat FeatureDF"), logFile = logFile)

dfParams <- data.frame(
    extendUpstream = extendUpstream,
    extendDownstream = extendDownstream,
    geneUpstream = extendUpstream,
    geneDownstream = extendDownstream,
    scaleTo = scaleTo,
    tileSize = tileSize,
    ceiling = ceiling,
    geneModel = geneModel,
    stringsAsFactors=FALSE
  )

######################################
# Initialize SP Mat Group
######################################
o <- .initializeMat(
  ArrowFile = ArrowFile,
  Group = matrixName,
  Class = "double",
  Units = "NormCounts",
  cellNames = cellNames,
  params = dfParams,
  featureDF = featureDF,
  force = TRUE
)

#Clean Memory
rm(dfParams, featureDF, genes)
gc()

#Normalize and add to Arrow File!
for(z in seq_along(geneRegions)){

  o <- tryCatch({

    #Get Chromosome
    chrz <- paste0(unique(seqnames(geneRegions[[z]])))

    .logDiffTime(sprintf("Adding GeneScoreMatrix to %s for Chr (%s of %s)!", sampleName, z, length(geneRegions)), 
      tstart, verbose = FALSE, logFile = logFile)

    #Re-Create Matrix for that chromosome!
    matGS <- readRDS(paste0(tmpFile, "-", chrz, ".rds"))
    file.remove(paste0(tmpFile, "-", chrz, ".rds"))

    #Normalize
    matGS@x <- as.numeric(scaleTo * matGS@x/rep.int(totalGS, Matrix::diff(matGS@p)))

    #Round to Reduce Digits After Final Normalization
    matGS@x <- round(matGS@x, 3)
    matGS <- Matrix::drop0(matGS)

    #Write sparseMatrix to Arrow File!
    o <- .addMatToArrow(
      mat = matGS, 
      ArrowFile = ArrowFile, 
      Group = paste0(matrixName, "/", chrz), 
      binarize = FALSE,
      addColSums = TRUE,
      addRowSums = TRUE,
      addRowVarsLog2 = TRUE #add for integration analyses
    )

    #Clean Memory
    rm(matGS)

    if(z %% 3 == 0 | z == length(geneRegions)){
      gc()
    }

  }, error = function(e){

    errorList <- list(
      ArrowFile = ArrowFile,
      geneRegions = geneRegions,
      blacklist = blacklist,
      chr = chrz,
      mat = if(exists("mat", inherits = FALSE)) mat else "mat"
    )

    .logError(e, fn = ".addGeneScoreMat AddToArrow", info = sampleName, errorList = errorList, logFile = logFile)

  })

}

return(ArrowFile)

}
i = 1
ArrowFiles =  getArrowFiles(proj)
genes = getGenes(proj)
peaks = getPeakSet(proj)
geneModel = "exp(-abs(x)/5000)"
matrixName = "GeneScoreMatrix"
extendUpstream = c(1000, 100000)
extendDownstream = c(1000, 100000)
geneUpstream = 5000 #New Param
geneDownstream = 0 #New Param
useGeneBoundaries = TRUE
useTSS = TRUE #New Param
extendTSS = FALSE
tileSize = 500
ceiling = 4
geneScaleFactor = 5 #New Param
scaleTo = 10000
excludeChr = c("chrY","chrM")
blacklist = getBlacklist(proj)
cellNames = NULL
allCells = NULL
force = FALSE
tmpFile = NULL
subThreads = 1
tstart = NULL
logFile = NULL

Adapt using Peaks

input = proj
logFile = createLogFile("addGeneScoreMatrix")
matrixName <- ArchR:::.isProtectedArray(matrixName, exclude = "GeneScoreMatrix")

if(inherits(input, "ArchRProject")){
  ArrowFiles <- getArrowFiles(input)
  allCells <- rownames(getCellColData(input))
  outDir <- getOutputDirectory(input)
}else if(inherits(input, "character")){
  outDir <- ""
  ArrowFiles <- input
  allCells <- NULL
}else{
  stop("Error Unrecognized Input!")
}
if(!all(file.exists(ArrowFiles))){
  stop("Error Input Arrow Files do not all exist!")
}

if(inherits(mcols(genes)$symbol, "list") | inherits(mcols(genes)$symbol, "SimpleList")){
  stop("Found a list in genes symbol! This is an incorrect format. Please correct your genes!")
}

#ArchR:::.startLogging(logFile = logFile)
#ArchR:::.logThis(mget(names(formals()),sys.frame(sys.nframe())), "addGeneScoreMatrix Input-Parameters", logFile = logFile)

#Valid GRanges
genes <- ArchR:::.validGRanges(genes)
peaks <- ArchR:::.validGRanges(peaks)
i = 1
ArrowFile <- ArrowFiles[i]
sampleName <- ArchR:::.sampleName(ArrowFile)

if(is.null(tmpFile)){
  tmpFile <- ArchR:::.tempfile(pattern = paste0("tmp-", ArchR:::.sampleName(ArrowFile)))
}

#Check
o <- h5closeAll()
#o <- ArchR:::.createArrowGroup(ArrowFile = ArrowFile, group = matrixName, force = force, logFile = logFile)

# remove any chromosomes which we do not want to inlcude (X and Y in this case)
geneRegions <- genes[BiocGenerics::which(seqnames(genes) %bcni% excludeChr)]
peakRegions <- peaks[BiocGenerics::which(seqnames(peaks) %bcni% excludeChr)]
print(paste0("Before filtering out the X and Y Chromosome we have ", length(genes), " genes, afterwards we have ", length(geneRegions), "."))
## [1] "Before filtering out the X and Y Chromosome we have 24368 genes, afterwards we have 24334."
geneRegions %>% head
## GRanges object with 6 ranges and 2 metadata columns:
##       seqnames          ranges strand |     gene_id      symbol
##          <Rle>       <IRanges>  <Rle> | <character> <character>
##   [1]     chr1 3214482-3671498      - |      497097        Xkr4
##   [2]     chr1 4290846-4409241      - |       19888         Rp1
##   [3]     chr1 4490928-4497354      - |       20671       Sox17
##   [4]     chr1 4773198-4785726      - |       27395      Mrpl15
##   [5]     chr1 4807893-4846735      + |       18777      Lypla1
##   [6]     chr1 4857694-4897909      + |       21399       Tcea1
##   -------
##   seqinfo: 21 sequences from mm10 genome

# add information on chromsome names to the GRanges object
seqlevels(geneRegions) <- as.character(unique(seqnames(geneRegions)))
geneRegions <- geneRegions[!is.na(mcols(geneRegions)$symbol)]
print(paste0("After removing missing gene names, we are left with ", length(geneRegions), " genes"))
## [1] "After removing missing gene names, we are left with 24333 genes"
seqlevels(peakRegions) <- as.character(unique(seqnames(peakRegions)))

#Create Gene Regions Then Remove Strand Column
if(useTSS){
  ArchR:::.logMessage(paste0(sampleName, " .addGeneScoreMat useTSS = TRUE"))
  distMethod <- "GenePromoter"
  
  geneRegions$geneStart <- start(resize(geneRegions, 1, "start"))
  geneRegions$geneEnd <- start(resize(geneRegions, 1, "end"))
  # in the ranges column we want to have the gene start coordinates
  geneRegions <- resize(geneRegions, 1, "start")
  
  
  peakRegions$peakStart <- start(resize(peakRegions, 1, "start"))
  peakRegions$peakEnd <- end(resize(peakRegions, 1, "start"))
  
  # this will be ignored for now, since we only want to use the TSS as 1bp size
  # if(extendTSS){
  #   geneRegions <- extendGR(gr = geneRegions, upstream = geneUpstream, downstream = geneDownstream)
  # }
  
  geneRegions$geneWeight <- geneScaleFactor
  
  
# This will be ignored for now, since we do not want to use gene bodies, but only TSS
}else{
  ArchR:::.logMessage(paste0(sampleName, " .addGeneScoreMat useTSS = FALSE"))
  distMethod <- "GeneBody"
  geneRegions$geneStart <- start(resize(geneRegions, 1, "start"))
  geneRegions$geneEnd <- start(resize(geneRegions, 1, "end"))
  geneRegions <- extendGR(gr = geneRegions, upstream = geneUpstream, downstream = geneDownstream)
  m <- 1 / width(geneRegions)
  geneRegions$geneWeight <- 1 + m * (geneScaleFactor - 1) / (max(m) - min(m))
}

#ArchR:::.logDiffTime(sprintf("Computing Gene Scores using distance relative to %s! ", distMethod), tstart, logFile = logFile)

#Add Gene Index For ArrowFile
geneRegions <- sort(sortSeqlevels(geneRegions), ignore.strand = TRUE)
peakRegions <- sort(sortSeqlevels(peakRegions), ignore.strand = TRUE)

#ArchR:::.logThis(geneRegions, paste0(sampleName, " .addGeneScoreMat geneRegions"), logFile = logFile)


# split geneRegions into a list of GRanges object, one for each chromosome 
#and add indices for each chromosome
geneRegions <- split(geneRegions, seqnames(geneRegions))
geneRegions <- lapply(geneRegions, function(x){
  mcols(x)$idx <- seq_along(x)
  return(x)
})

peakRegions <- split(peakRegions, seqnames(peakRegions))
peakRegions <- lapply(peakRegions, function(x){
  mcols(x)$idx <- seq_along(x)
  return(x)
})



# get peak names for the peak matrix
peak_df <- ArchR:::.getFeatureDF(ArrowFile, subGroup = "PeakMatrix")
peak_df <- peak_df %>% as.data.frame() %>%
  unite("rownames", seqnames, start, sep = ":", remove = FALSE) %>% 
  unite("rownames", rownames, end, sep = "-", remove = FALSE)


#Blacklist Split
if(!is.null(blacklist)){
  if(length(blacklist) > 0){
    # create a list of length of number of chromosomes
    # each element of the list contains the GRanges for the corresponding chromosome only
    blacklist <- split(blacklist, seqnames(blacklist))
  }
}

#Get all cell ids before constructing matrix
if(is.null(cellNames)){
  cellNames <- ArchR:::.availableCells(ArrowFile)
}

if(!is.null(allCells)){
  cellNames <- cellNames[cellNames %in% allCells]
}

tstart <- Sys.time()

  #########################################################################################################
  #First we will write gene scores to a temporary path! rhdf5 delete doesnt actually delete the memory!
  #########################################################################################################
totalGS <- ArchR:::.safelapply(seq_along(geneRegions), function(z){ # we loop over the z chromosomes here
  # geneRegions is a list with one Granges object for each chromosome

  totalGSz <- tryCatch({

    ArchR:::.logDiffTime(sprintf("Creating Temp GeneScoreMatrix for %s, Chr (%s of %s)!", sampleName, z, length(geneRegions)), 
      tstart, verbose = FALSE, logFile = logFile)

    #Get Gene Starts
    # extract Granges for chromosome z
    geneRegionz <- geneRegions[[z]]
    # order the entire GRanges object according to index 
    geneRegionz <- geneRegionz[order(geneRegionz$idx)]
    # get the chromosome information for this gene Region
    chrz <- paste0(unique(seqnames(geneRegionz)))
    
    peakRegionz <- peakRegions[[z]]
    peakRegionz <- peakRegionz[order(peakRegionz$idx)]
    chrp <- paste0(unique(seqnames(peakRegionz)))
    stopifnot(chrz == chrp)
    
    # get peak matrix
    peak_mat <- ArchR:::.getMatFromArrow(ArrowFile, binarize = FALSE, cellNames = cellNames, useMatrix = "PeakMatrix")
    # set rownames for the peak matrix
    rownames(peak_mat) <- peak_df$rownames
    # get peak matrix for cellnames 
    subset_peak_mat <- peak_mat[rownames(peak_mat) %in% names(peakRegionz), ]
    #stopifnot(max(subset_peak_mat <= 4))
    
    
    mcols(peakRegionz)$middle <- start(peakRegionz) + width(peakRegionz)/2
    uniquePeaks <- sort(unique(peakRegionz$middle))
    
    rm(peak_mat)
    gc()
    
    
    #Clean Memory
    #rm(uniqIns, fragSt, fragEd, fragBC)
    gc() 

    #Time to Overlap Gene Windows
    
    
    #### maybe for later
    if(useGeneBoundaries){

      geneStartz <- start(resize(geneRegionz, 1, "start"))
      geneEndz <- start(resize(geneRegionz, 1, "end"))

      pminGene <- pmin(geneStartz, geneEndz)
      pmaxGene <- pmax(geneStartz, geneEndz)

      idxMinus <- BiocGenerics::which(strand(geneRegionz) != "-")
  
      pReverse <- rep(max(extendDownstream), length(pminGene))
      pReverse[idxMinus] <- rep(max(extendUpstream), length(idxMinus))

      pReverseMin <- rep(min(extendDownstream), length(pminGene))
      pReverseMin[idxMinus] <- rep(min(extendUpstream), length(idxMinus))

      pForward <- rep(max(extendUpstream), length(pminGene))
      pForward[idxMinus] <- rep(max(extendDownstream), length(idxMinus))      

      pForwardMin <- rep(min(extendUpstream), length(pminGene))
      pForwardMin[idxMinus] <- rep(min(extendDownstream), length(idxMinus))      

      ################################################################
      #We will test when genes pass by another gene promoter
      ################################################################

      #Start of Range is based on the max observed gene ranged <- direction
      s <- pmax(
        c(1, pmaxGene[-length(pmaxGene)] + tileSize), 
        pminGene - pReverse
      )
      s <- pmin(pminGene - pReverseMin, s)

      #End of Range is based on the max observed gene ranged -> direction
      e <- pmin(
          c(pminGene[-1] - tileSize, pmaxGene[length(pmaxGene)] + pForward[length(pmaxGene)]), 
          pmaxGene + pForward
        )
      e <- pmax(pmaxGene + pForwardMin, e)

      extendedGeneRegion <- IRanges(start = s, end = e)

      idx1 <- which(pminGene - pReverseMin < start(extendedGeneRegion))
      if(length(idx1) > 0){
        stop("Error in gene boundaries minError")
      }

      idx2 <- which(pmaxGene + pForwardMin > end(extendedGeneRegion))
      if(length(idx2) > 0){
        stop("Error in gene boundaries maxError")
      }
     
     rm(s, e, pReverse, pReverseMin, pForward, pForwardMin, geneStartz, geneEndz, pminGene, pmaxGene)
     
     
     ###### interesting for me
    }else{ 
      
      # first we extend the gene region -> IRanges object, which contains start 
      # and end of the 200,000 bp gene regions/windows (+/- 100,000 from TSS) 
      extendedGeneRegion <- ranges(suppressWarnings(extendGR(geneRegionz, upstream = max(extendUpstream), downstream = max(extendDownstream))))

    }
    
    # Here I would like to use peaks which overlap instead of tiles
    # each row of tmp contains index of GeneRegion and the index of Tiles with 
    # which it overlaps
    # convert peakRegionz to IRanges object as well
    tmp <- suppressWarnings(findOverlaps(extendedGeneRegion, ranges(peakRegionz)))
    
    print(paste0("In total there are ", 
                 subjectLength(tmp), " peaks overlapping with gene regions found on this chromosome"))
    print(paste0("The total number of peaks found on this chromosome is ", length(peakRegionz)))
    
    print(tmp %>% as.data.frame() %>% 
      group_by(queryHits) %>% # gene region
      summarize(n = n()) %>% # get number of peaks overlapping with a gene region
      ggplot() + geom_bar(aes(x = n)) +
      labs(title = "number of peaks per gene region of size +/- 100kb from TSS", x = "number of peaks"))
    
    # by extracting the indices of genes and tiles which overlap and computing 
    # the distances between them we get for each match the distance between gene and tile
    
    # we want to compute the distance to the peak middle
    peak_middle_region <- peakRegionz
    start(peak_middle_region) = start(peak_middle_region) + floor(width(peak_middle_region) / 2)
    
    x <- distance(ranges(geneRegionz)[queryHits(tmp)], 
                  ranges(resize(peakRegionz, width = 1))[subjectHits(tmp)])
    
    print(ggplot() + geom_histogram(aes(x = x), bins = 500) + 
      labs(title = "Distribution of absolute distances between genes and tiles within a gene region",
           x = "distance"))
    

    #Determine Sign for Distance relative to strand (Directionality determined based on dist from gene start)
    # negative distance meaning  upstrea, positive distance downstream of gene
    
    # get Minus strand coordinates
    isMinus <- BiocGenerics::which(strand(geneRegionz) == "-")
    # subtract the gene start coordinate from the tile start coordinate -> relative distances
    signDist <- sign(start(ranges(peakRegionz))[subjectHits(tmp)] - 
                       start(resize(geneRegionz,1,"start"))[queryHits(tmp)])
    # convert the direction of distance for all distances corresponding to the negative strand
    signDist[isMinus] <- signDist[isMinus] * -1

    #Correct the orientation for the distance!
    # x contains absolute distances, but we want to give direction to distances
    x <- x * signDist
    
    print(ggplot() + geom_histogram(aes(x = x), bins = 500) + 
      labs(title = "Distribution of relative distances between genes and tiles within a gene region",
           x = "relative distance to TSS"))
    
    #Evaluate  Input Model -> compute distance weights
    x <- eval(parse(text=geneModel))
    
    print(ggplot() + geom_histogram(aes(x = x), bins = 500) + 
      labs(title = "Distribution of distance weights",
           x = "distance weight"))
    print(ggplot() + geom_histogram(aes(x = x), bins = 500) + 
      labs(title = "Distribution of distance weights",
           x = "distance weight", y = "log10(count)") + scale_y_log10())
    
    print("distanc weights ")
    x %>% head

    #Get Gene Weights Related to Gene Width, in our case simply multiply 
    # everything by 5, because we do not use the size of the gene
    x <- x * mcols(geneRegionz)$geneWeight[queryHits(tmp)]
    
    print("Distance weights scaled by gene weight, which is constant in our case: ")
    x %>% head

    #Remove Blacklisted Tiles!
    if(!is.null(blacklist)){
      if(length(blacklist) > 0){
        blacklistz <- blacklist[[chrz]]
        if(is.null(blacklistz) | length(blacklistz) > 0){
          tilesBlacklist <- 1 * (!overlapsAny(ranges(peakRegionz), ranges(blacklistz)))
          if(sum(tilesBlacklist == 0) > 0){
            x <- x * tilesBlacklist[subjectHits(tmp)] #Multiply Such That All Blacklisted Tiles weight is now 0!
          }
        }
      }
    }
    

    #Creating Sparse Matrix
    # this is genes x peaks, with distance weights 
    tmp <- Matrix::sparseMatrix(
      i = queryHits(tmp), 
      j = subjectHits(tmp), 
      x = x, 
      dims = c(length(geneRegionz), nrow(subset_peak_mat))
    )

    # Number of peaks for each gene
    print(ggplot() + geom_histogram(aes(x = rowSums(tmp)), bins = 200) +
      labs(title = "Sum of distance weights for each gene",
           x = "total distance weights per gene")) #+ scale_y_log10()
    
    # Number of genes for each peak
    print(ggplot() + geom_histogram(aes(x = colSums(tmp)), bins = 200) +
      labs(title = "Sum of distance weights for each peak",
           x = "total distance weights per peak"))
    
    
    #Calculate Gene Scores
    matGS <- tmp %*% subset_peak_mat
    colnames(matGS) <- cellNames

    totalGSz <- Matrix::colSums(matGS)
    print(paste0("For chromosome ", chrz, " there are ", length(totalGSz), " cells, meaning that there will be as many total gene activity scores."))
    print(ggplot() + geom_histogram(aes(x = totalGSz), bins = 500) +
      labs(title = "Total Gene activity in each cell", x = "total gene activity"))

    #Save tmp file
    .safeSaveRDS(matGS, file = paste0(tmpFile, "-", chrz, ".rds"), compress = FALSE)

    #Clean Memory
    rm(isMinus, signDist, extendedGeneRegion, uniqueTiles)
    rm(matGS, tmp)
    gc()

    totalGSz
 
  }, error = function(e){

    errorList <- list(
      ArrowFile = ArrowFile,
      geneRegions = geneRegions,
      blacklist = blacklist,
      chr = chrz,
      totalGSz = if(exists("totalGSz", inherits = FALSE)) totalGSz else "totalGSz",
      matGS = if(exists("matGS", inherits = FALSE)) matGS else "matGS"
    )

   # ArchR:::.logError(e, fn = "ArchR:::.addGeneScoreMat TmpGS", info = sampleName, errorList = errorList, logFile = logFile)

  })

  totalGSz

})#, threads = subThreads) %>% Reduce("+", .)
## [1] "In total there are 12260 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 12260"

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 2 rows containing missing values (geom_bar).

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr1 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 14119 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 14119"

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr2 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 8959 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 8959"

## Warning: Transformation introduced infinite values in continuous y-axis
## Removed 2 rows containing missing values (geom_bar).

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr3 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 12236 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 12236"

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 1 rows containing missing values (geom_bar).

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr4 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 11745 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 11745"

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr5 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 9910 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 9910"

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr6 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 11130 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 11130"

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr7 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 9851 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 9851"

## Warning: Transformation introduced infinite values in continuous y-axis
## Removed 1 rows containing missing values (geom_bar).

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr8 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 10205 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 10205"

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr9 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 7661 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 7661"

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 6 rows containing missing values (geom_bar).

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr12 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 8094 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 8094"

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 4 rows containing missing values (geom_bar).

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr13 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 7151 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 7151"

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 6 rows containing missing values (geom_bar).

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr14 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 7375 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 7375"

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr15 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 5649 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 5649"

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 21 rows containing missing values (geom_bar).

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr16 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 7720 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 7720"

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr17 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 5809 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 5809"

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 17 rows containing missing values (geom_bar).

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr18 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 5421 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 5421"

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 3 rows containing missing values (geom_bar).

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chr19 there are 7702 cells, meaning that there will be as many total gene activity scores."

## [1] "In total there are 3033 peaks overlapping with gene regions found on this chromosome"
## [1] "The total number of peaks found on this chromosome is 3033"

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 61 rows containing missing values (geom_bar).

## [1] "distanc weights "
## [1] "Distance weights scaled by gene weight, which is constant in our case: "

## [1] "For chromosome chrX there are 7702 cells, meaning that there will be as many total gene activity scores."